In [1]:
# load required libraries
options(stringsAsFactors = F)
options (repr.plot.width = 12, repr.plot.height = 5)
suppressPackageStartupMessages({
library(Seurat)
library(harmony)
library(ggplot2)
library(dplyr)
library(Matrix)
library(Hmisc)
library(ggsci)
library(viridis)
library(RColorBrewer)
library(ggrepel)
library(cowplot)
})
set.seed(123)
In [2]:
# load samples
sample_exp_220406 <- readRDS("../../1_CAD_Preprocessing/EXP220406/rds/CAD1_clean_clustered.rds")
sample_exp_220406$source <- "CAD1"
sample_exp_220406

sample_exp_230206 <- readRDS("../../1_CAD_Preprocessing/EXP230206/rds/CAD2_clean_clustered.rds")
sample_exp_230206$source <- "CAD2"
sample_exp_230206

sample_exp_220304 <- readRDS("../../1_CAD_Preprocessing/EXP220304/rds/CAD3_clean_clustered.rds")
sample_exp_220304$source <- "CAD3"
sample_exp_220304
An object of class Seurat 
33538 features across 7198 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
 3 dimensional reductions calculated: pca, tsne, umap
An object of class Seurat 
33538 features across 7295 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
 3 dimensional reductions calculated: pca, tsne, umap
An object of class Seurat 
33538 features across 8622 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
 3 dimensional reductions calculated: pca, tsne, umap
In [3]:
# merge sample and normalize
scList <- list(sample_exp_220304, sample_exp_220406, sample_exp_230206)
sample <- merge(scList[[1]], scList[2:3])
sample <- subset(sample, nFeature_RNA >= 500 & nFeature_RNA <= 5000 & percent.mt <= 10)
sample <- NormalizeData(sample, normalization.method = "LogNormalize", scale.factor = 10000)

# find variable Genes and scale data
sample <- FindVariableFeatures(sample, selection.method = "vst")
sample <- ScaleData(sample)
sample
Warning message in CheckDuplicateCellNames(object.list = objects):
“Some cell names are duplicated across objects provided. Renaming to enforce unique cell names.”
Centering and scaling data matrix

An object of class Seurat 
33538 features across 20978 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
In [4]:
# run pca and harmony
sample <- RunPCA(sample, verbose = FALSE)
sample <- RunHarmony(sample, group.by.vars = "source", verbose = FALSE)
DimPlot(sample, reduction = "harmony", pt.size = 0.1, group.by = "source")
ElbowPlot(sample, ndims = 30)
Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
No description has been provided for this image
No description has been provided for this image
In [5]:
# dimension reduction and clustering
pca_dims <- 1:30
sample <- RunTSNE(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- RunUMAP(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindNeighbors(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindClusters(sample, resolution = 0.5, verbose = FALSE)
DimPlot(sample, label=TRUE, reduction = "tsne", group.by = "ident", pt.size = 0.1)
DimPlot(sample, label=TRUE, reduction = "umap", group.by = "ident", pt.size = 0.1)
Warning message:
“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
No description has been provided for this image
No description has been provided for this image
In [6]:
# quality control by cluster
VlnPlot(sample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_sample_by_cluster_CAD.pdf", width = 12, height = 10)
No description has been provided for this image
In [7]:
# add resolution
sample <- FindClusters(sample, resolution = 1, verbose = FALSE)
DimPlot(sample, label = TRUE, reduction = "umap", group.by = "ident")
DimPlot(sample, label = TRUE, reduction = "tsne", group.by = "ident")
No description has been provided for this image
No description has been provided for this image
In [8]:
# plot marker genes list
markers <- unique(c("PECAM1","VWF","CDH5","FLT1","FLI1","LMO2","CLEC14A","FABP4","IFI27","EGFL7", # EC
"CCL14","ACKR1","NRP2","NR2F2", # VEC
"HEY1","GJA5","VEGFC","SOX17", # AEC
"ABCC9", "GUCY1A2", "RGS5", "EGFLAM", # Pericyte
"CCL21", # LEC
"CLDN1", # Mesothelial
"ACTA2","MYH11","MYL9","TAGLN","TPM2","RGS5","PPP1R14A", # SMC
"ACTA2","MYH11","LUM","DCN","PPP1R14A", # MyoFB
"LUM","DCN","FBLN1","APOD","CFD","SFRP2", # FB
"PLP1","NRXN1","S100B","MPZ","GPM6B", # OLG
"CD3D","CD3E","IL7R","CXCR4","CCL5","CD52", # T
"KLRD1","NKG7","CCL5", # NK
"CD68","AIF1","MPEG1","IL1B","CXCL8","CCL3","CXCR4", # Macrophage
"CD79A","MZB1","JCHAIN","IGKC","IGHG1","IGHG2","IGHA1", # B
"TPASB1","TPSB2","CPA3","AREG","HPGD5")) # Mast
DotPlot(sample, features=rev(markers), cols=c("grey90","red3"), group.by="seurat_clusters") + theme(axis.text.x=element_text(angle=45, hjust=1))
ggsave("figure/dotplot_CAD_markers_CAD.pdf", width = 18, height = 12)
Warning message in FetchData.Seurat(object = object, vars = features, cells = cells):
“The following requested variables were not found: HPGD5, TPASB1”
No description has been provided for this image
In [9]:
# quality control by cluster
VlnPlot(sample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_sample_by_cluster_res1.pdf", width = 15, height = 10)
No description has been provided for this image
In [10]:
# explore marker genes
FeaturePlot(sample, label = T, features=c("CCL21"), cols=c("grey90","red3"), reduction="tsne", pt.size = 0.5)
No description has been provided for this image
In [11]:
# save the sample
saveRDS(sample, file = "rds/sample_CAD.rds")
In [12]:
# remove low quality clusters
subsample <- subset(sample, idents = c(11, 12, 13, 16, 17, 19, 22, 23, 25), invert = T)
subsample <- NormalizeData(subsample, normalization.method = "LogNormalize", scale.factor = 10000)
dim(GetAssayData(subsample, slot = "counts"))

# find variable genes and scale data by number of UMIs and mitochondrial gene percentage
subsample <- FindVariableFeatures(subsample, selection.method = "vst")
length(subsample@assays$RNA@var.features)
subsample <- ScaleData(subsample)

# run pca and harmony
subsample <- RunPCA(subsample, verbose = FALSE)
subsample <- RunHarmony(subsample, group.by.vars = "source", verbose = FALSE)
DimPlot(subsample, reduction = "harmony", pt.size = 0.1, group.by = "source")
ElbowPlot(subsample, ndims = 30)
  1. 33538
  2. 18304
2000
Centering and scaling data matrix

Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
No description has been provided for this image
No description has been provided for this image
In [13]:
# dimension reduction and clustering
pca_dims <- 1:30
subsample <- RunTSNE(subsample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
subsample <- RunUMAP(subsample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
subsample <- FindNeighbors(subsample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
subsample <- FindClusters(subsample, resolution = 0.5, verbose = FALSE)
DimPlot(subsample, label=TRUE, reduction = "tsne", group.by = "ident", pt.size = 0.1)
DimPlot(subsample, label=TRUE, reduction = "umap", group.by = "ident", pt.size = 0.1)
No description has been provided for this image
No description has been provided for this image
In [14]:
# quality control by cluster
VlnPlot(subsample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_subsample_by_cluster_CAD.pdf", width = 10, height = 10)
No description has been provided for this image
In [15]:
# add resolution
subsample <- FindClusters(subsample, resolution = 1.5, verbose = FALSE)
DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "ident")
DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "ident")
No description has been provided for this image
No description has been provided for this image
In [16]:
# plot marker genes list
markers <- unique(c("PECAM1","VWF","CDH5","FLT1","FLI1","LMO2","CLEC14A","FABP4","IFI27","EGFL7", # EC
"CCL14","ACKR1","NRP2","NR2F2", # VEC
"HEY1","GJA5","VEGFC","SOX17", # AEC
"ABCC9", "GUCY1A2", "RGS5", "EGFLAM", # Pericyte
"CCL21", # LEC
"CLDN1", # Mesothelial
"ACTA2","MYH11","MYL9","TAGLN","TPM2","RGS5","PPP1R14A", # SMC
"ACTA2","MYH11","LUM","DCN","PPP1R14A", # MyoFB
"LUM","DCN","FBLN1","APOD","CFD","SFRP2", # FB
"PLP1","NRXN1","S100B","MPZ","GPM6B", # OLG
"CD3D","CD3E","IL7R","CXCR4","CCL5","CD52", # T
"KLRD1","NKG7","CCL5", # NK
"CD68","AIF1","MPEG1","IL1B","CXCL8","CCL3","CXCR4", # Macrophage
"CD79A","MZB1","JCHAIN","IGKC","IGHG1","IGHG2","IGHA1", # B
"TPASB1","TPSB2","CPA3","AREG","HPGD5")) # Mast
DotPlot(subsample, features=rev(markers), cols=c("grey90","red3"), group.by="seurat_clusters") + theme(axis.text.x=element_text(angle=45, hjust=1))
ggsave("figure/dotplot_CAD_markers_subsample_CAD.pdf", width = 10, height = 12)
Warning message in FetchData.Seurat(object = object, vars = features, cells = cells):
“The following requested variables were not found: HPGD5, TPASB1”
No description has been provided for this image
In [17]:
# plot tsne and umap
a <- DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "ident")
b <- DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "ident")
plot_grid(a, b, ncol = 2)
No description has been provided for this image
In [18]:
# explore marker genes
gene <- "STC1"
a <- FeaturePlot(subsample, label = F, features=gene, cols=c("grey90","red3"), reduction="tsne", pt.size = 0.1)
b <- DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "ident")
plot_grid(a, b, ncol = 2)

c <- FeaturePlot(subsample, label = F, features=gene, cols=c("grey90","red3"), reduction="umap", pt.size = 0.1)
d <- DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "ident")
plot_grid(c, d, ncol = 2)
No description has been provided for this image
No description has been provided for this image
In [19]:
# save the subsample
saveRDS(subsample, file = "rds/subsample_CAD.rds")
In [20]:
# celltype assignment
cluster = c(0:24)
celltype = c("EC","PC","FB","SMC","EC","SMC","EC","EC","T","Mac",
            "EC","T","EC","MC","T","T","EC","T","SMC","SMC","PC",
            "Mac","LEC","GC","GC")
subsample$celltype = plyr::mapvalues(x=Idents(subsample), from=cluster, to=celltype)
a <- DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "celltype")
b <- DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "celltype")
plot_grid(a, b, ncol = 2)
No description has been provided for this image
In [21]:
# refine the NKT and FB-MyoFB cell
subsample <- FindClusters(subsample, resolution = 2.5, verbose = FALSE)
DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "ident")
subsample$celltype <- as.character(subsample$celltype)
# subsample@meta.data[colnames(subset(subsample, idents = c(25))), "celltype"] <- "NKT"
subsample@meta.data[colnames(subset(subsample, idents = c(26))), "celltype"] <- "FB"
a <- DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "celltype")
b <- DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "celltype")
plot_grid(a, b, ncol = 2)
subsample <- FindClusters(subsample, resolution = 1.5, verbose = FALSE)
No description has been provided for this image
No description has been provided for this image
In [22]:
# save the annotated subsample
saveRDS(subsample, file = "rds/subsample_CAD_annotated_level1.rds")
In [23]:
# calculate markers
markers_cluster <- FindAllMarkers(subsample, only.pos = T, verbose = F)
write.csv(markers_cluster, "meta/markers_CAD_by_cluster.csv")
In [24]:
# calculate celltype markers
subsample$celltype <- factor(subsample$celltype, levels = c("VEC","CEC","AEC","LEC","MC","FB","SMC","PC1","PC2","Mac","T","NKT","GC1","GC2"))
Idents(subsample) <- subsample$celltype
markers_celltype <- FindAllMarkers(subsample, only.pos = T, verbose = F)
write.csv(markers_celltype, "meta/markers_CAD_by_celltype.csv")
In [25]:
# list the session info
sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 18.04.5 LTS

Matrix products: default
BLAS/LAPACK: /data/zju/ty/miniconda/envs/singlecell/lib/libopenblasp-r0.3.3.so;  LAPACK version 3.8.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=zh_CN.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=zh_CN.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=zh_CN.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C       

time zone: Asia/Shanghai
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] cowplot_1.1.1      ggrepel_0.9.4      RColorBrewer_1.1-3 viridis_0.6.4     
 [5] viridisLite_0.4.2  ggsci_3.0.0        Hmisc_5.1-1        Matrix_1.6-1.1    
 [9] dplyr_1.1.4        sctransform_0.4.0  ggplot2_3.5.1      harmony_0.1.1     
[13] Rcpp_1.0.11        SeuratObject_4.1.4 Seurat_4.4.0      

loaded via a namespace (and not attached):
  [1] rstudioapi_0.17.1      jsonlite_1.8.7         magrittr_2.0.3        
  [4] spatstat.utils_3.1-1   rmarkdown_2.25         farver_2.1.1          
  [7] ragg_1.2.5             vctrs_0.6.4            ROCR_1.0-11           
 [10] spatstat.explore_3.2-3 base64enc_0.1-3        htmltools_0.5.6.1     
 [13] Formula_1.2-5          parallelly_1.36.0      KernSmooth_2.23-22    
 [16] htmlwidgets_1.6.2      ica_1.0-3              plyr_1.8.9            
 [19] plotly_4.10.2          zoo_1.8-12             uuid_1.1-1            
 [22] igraph_1.5.1           mime_0.12              lifecycle_1.0.3       
 [25] pkgconfig_2.0.3        R6_2.5.1               fastmap_1.1.1         
 [28] fitdistrplus_1.1-11    future_1.33.0          shiny_1.7.5           
 [31] digest_0.6.33          colorspace_2.1-0       patchwork_1.3.0.9000  
 [34] tensor_1.5             irlba_2.3.5.1          textshaping_0.4.0     
 [37] labeling_0.4.3         progressr_0.14.0       fansi_1.0.5           
 [40] spatstat.sparse_3.0-2  httr_1.4.7             polyclip_1.10-6       
 [43] abind_1.4-5            compiler_4.3.1         withr_2.5.1           
 [46] htmlTable_2.4.1        backports_1.4.1        MASS_7.3-60           
 [49] tools_4.3.1            foreign_0.8-85         lmtest_0.9-40         
 [52] httpuv_1.6.11          future.apply_1.11.0    nnet_7.3-19           
 [55] goftest_1.2-3          glue_1.6.2             nlme_3.1-163          
 [58] promises_1.2.1         grid_4.3.1             pbdZMQ_0.3-10         
 [61] checkmate_2.2.0        Rtsne_0.16             cluster_2.1.4         
 [64] reshape2_1.4.4         generics_0.1.3         gtable_0.3.4          
 [67] spatstat.data_3.0-1    tidyr_1.3.1            data.table_1.14.8     
 [70] sp_2.1-0               utf8_1.2.3             spatstat.geom_3.2-5   
 [73] RcppAnnoy_0.0.21       RANN_2.6.1             pillar_1.9.0          
 [76] stringr_1.5.0          IRdisplay_1.1          later_1.3.1           
 [79] splines_4.3.1          lattice_0.21-9         survival_3.5-7        
 [82] deldir_1.0-9           tidyselect_1.2.0       miniUI_0.1.1.1        
 [85] pbapply_1.7-2          knitr_1.44             gridExtra_2.3         
 [88] scattermore_1.2        xfun_0.40              matrixStats_1.0.0     
 [91] stringi_1.7.12         lazyeval_0.2.2         evaluate_0.22         
 [94] codetools_0.2-19       tibble_3.2.1           cli_3.6.3             
 [97] uwot_0.1.16            IRkernel_1.3.2         rpart_4.1.21          
[100] systemfonts_1.1.0      xtable_1.8-4           reticulate_1.34.0     
[103] repr_1.1.6             munsell_0.5.0          globals_0.16.2        
[106] spatstat.random_3.1-6  png_0.1-8              parallel_4.3.1        
[109] ellipsis_0.3.2         listenv_0.9.0          scales_1.3.0          
[112] ggridges_0.5.4         leiden_0.4.3           purrr_1.0.2           
[115] crayon_1.5.2           rlang_1.1.4           
In [ ]: